import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { OpenAIEmbeddings } from '@langchain/openai';
import { PineconeStore } from '@langchain/pinecone';
import { pinecone } from '@/lib/utils/pinecone-client';
import { PINECONE_INDEX_NAME, PINECONE_NAME_SPACE } from '@/config/pinecone';
import { DirectoryLoader } from 'langchain/document_loaders/fs/directory';
import { PDFLoader } from 'langchain/document_loaders/fs/pdf';
// import { TextLoader } from 'langchain/document_loaders/fs/text';

const filePath = 'docs';

export const run = async () => {
  try {
    const directoryLoader = new DirectoryLoader(filePath, {
      // '.txt': (path) => new TextLoader(path),
      '.pdf': (path) => new PDFLoader(path),
    });

    const rawDocs = await directoryLoader.load();

    /* Split text into chunks */
    const textSplitter = new RecursiveCharacterTextSplitter({
      chunkSize: 1000,
      chunkOverlap: 200,
    });

    const docs = await textSplitter.splitDocuments(rawDocs);

    const embeddings = new OpenAIEmbeddings();
    const index = pinecone.Index(PINECONE_INDEX_NAME); //change to your own index name

    // 是否已经存在
    // const vectorStore = await PineconeStore.fromExistingIndex(
    //   new OpenAIEmbeddings(),
    //   { pineconeIndex: index, namespace: PINECONE_NAME_SPACE }
    // );

    //embed the documents
    await PineconeStore.fromDocuments(docs, embeddings, {
      pineconeIndex: index,
      maxConcurrency: 5,
      namespace: PINECONE_NAME_SPACE,
      textKey: 'text',
    });
  } catch (error) {
    console.log('error', error);
    throw new Error('Failed to ingest your data');
  }
};

// import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
// import { OpenAIEmbeddings } from '@langchain/openai';
// import { createClient } from "redis";
// import { RedisVectorStore } from "@langchain/redis";
// import { DirectoryLoader } from 'langchain/document_loaders/fs/directory';
// import { PDFLoader } from 'langchain/document_loaders/fs/pdf';
// import { Document } from "@langchain/core/documents";

// const filePath = 'docs';

// export const run = async () => {
//   try {
//     // const directoryLoader = new DirectoryLoader(filePath, {
//     //   '.pdf': (path) => new PDFLoader(path),
//     // });
//     // const rawDocs = await directoryLoader.load();
//     // const textSplitter = new RecursiveCharacterTextSplitter({
//     //   chunkSize: 1000,
//     //   chunkOverlap: 200,
//     // });
//     // const docs = await textSplitter.splitDocuments(rawDocs);

//     const docs = [
//       new Document({
//         metadata: { foo: "bar" },
//         pageContent: "redis is fast",
//       }),
//       new Document({
//         metadata: { foo: "bar" },
//         pageContent: "the quick brown fox jumped over the lazy dog",
//       })
//     ];

//     const client = createClient({
//       url: process.env.REDIS_URL,
//       database: 1,
//       username: 'root',
//       password: '123456',
//     });
//     await client.connect();
//     const vectorStore = await RedisVectorStore.fromDocuments(
//       docs,
//       new OpenAIEmbeddings(),
//       {
//         redisClient: client,
//         indexName: "docs",
//       }
//     );
//     await client.disconnect();
//   } catch (error) {
//     console.log('error', error);
//     throw new Error('Failed to ingest your data');
//   }
// };

(async () => {
  await run();
  console.log('ingestion complete');
})();
